%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define data and sepcification for skillbias_main
% this version for publication: October 2013
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [data,nshocks,pndx,hndx,spndx,pricendx,wLndx,relendx,T,num_vars,frac_scalar] = data_spec_pub(...
    price,wage_premium,prod,hours,rel_supply,rel_empl,wagelow,irate,investment,consumption,spec,nlagsvar,prod_choice,do_break,break_choice,sample,do_level,trendbreak)

start = 1;
if sample == 1
    samplesize = 87;
elseif sample == 2
    samplesize = 109;
elseif sample == 5
    samplesize = 100;
elseif sample == 6
    samplesize = 184;
    %     start = 5;
elseif sample == 7
    samplesize = 184;
    start = 29;
end

% impose sample, difference data and multiply by 100 since numbers are very small (will be
% reversed below)
if prod_choice == 0
    d_prod = diff(prod(start:start+samplesize))*100;
end
d_hours = diff(hours(start:start+samplesize))*100;
l_hours = hours(start+1:start+samplesize);
d_price = -diff(price(start:start+samplesize))*100;
if sample <= 5
    d_prem = diff(wage_premium(start:start+samplesize,:))*100;
    d_rels = diff(rel_supply(start:start+samplesize,:))*100;
    d_rele = diff(rel_empl(start:start+samplesize,:))*100;
    d_wL = diff(wagelow(start:start+samplesize,:))*100;
    interest = irate(start+1:start+samplesize);
    d_inv = diff(investment(start:start+samplesize))*100;
    d_cons = diff(consumption(start:start+samplesize))*100;
    
    rel_series = rel_empl(start:start+samplesize,:);
    rel_level = exp(rel_series)./(1+exp(rel_series));
end
if sample == 6 || sample == 7
    frac_scalar = 0;
else
    frac_scalar = mean(rel_level);
end

% detrending the series
if do_break == 1
    T = size(d_hours,1);
    dd = (1:T)';
    if break_choice == 0
        c = size(trendbreak,2)+1;
        dum = zeros(T,c);
        dum(1:trendbreak(1),1) = ones(trendbreak(1),1);
        for j=2:c-1,
            dum(trendbreak(j-1)+1:trendbreak(j),j) = ones(trendbreak(j)-trendbreak(j-1),1);
        end;
        dum(trendbreak(c-1)+1:end,c) = ones(T-trendbreak(c-1),1);
        [~,~,resid] = regress(d_hours,dum);
        d_hours = resid;
        [~,~,lresid] = regress(l_hours,dum);
        l_hours = lresid;
    elseif break_choice == 1
        hours_trend = hpfilter(l_hours,6400);
        l_hours = l_hours-hours_trend;
    elseif break_choice == 2
        [~,~,hours_trend] = bpass(l_hours,52,10e15);
        l_hours = l_hours-hours_trend;
        [~,~,hoursd_trend] = bpass(d_hours,52,10e15);
        d_hours = d_hours-hoursd_trend;
    elseif break_choice == 31
        xx = [ones(T,1) dd];
        [~,~,resid] = regress(l_hours,xx);
        l_hours = resid;
        [~,~,dresid] = regress(d_hours,xx);
        d_hours = dresid;
    elseif break_choice == 32
        xx = [ones(T,1) dd dd.^2];
        [~,~,resid] = regress(l_hours,xx);
        l_hours = resid;
        [~,~,dresid] = regress(d_hours,xx);
        d_hours = dresid;
    elseif break_choice == 33
        xx = [ones(T,1) dd dd.^2 dd.^3];
        [~,~,resid] = regress(l_hours,xx);
        l_hours = resid;
        [~,~,dresid] = regress(d_hours,xx);
        d_hours = dresid;
    end
end
if do_level == 1
    x_hours = l_hours;
else
    x_hours = d_hours;
end

% variables
if spec == 1
    display('Performing Gali identification of technology shocks');
    display('Variables in the VAR: productivity, hours');
    data = [d_prod x_hours];
    nshocks = 1; % number of identified shocks
    pndx = 1; hndx = 2; spndx = 0; pricendx = 0; wLndx = 0; relendx = 0;
elseif spec == 2
    display('Performing Gali identification of technology shocks');
    display('Variables in the VAR: productivity, hours, premium');
    data = [d_prod x_hours d_prem d_rele];
    nshocks = 1;
    pndx = 1; hndx = 2; spndx = 3; pricendx = 0; wLndx = 0; relendx = 4;
elseif spec == 3
    display('Performing Gali identification of technology shocks');
    display('Variables in the VAR: productivity, hours, premium, price');
    data = [d_prod x_hours d_prem d_price];
    nshocks = 1;
    pndx = 1; hndx = 2; spndx = 3; pricendx = 4; wLndx = 0; relendx = 0;
elseif spec == 5
    display('Performing Fisher identification of i-spec and i-neutral technology shocks');
    display('Variables in the VAR: price, productivity, premium, hours');
    data = [d_price d_prod d_prem x_hours];
    nshocks = 2;
    pndx = 2; hndx = 4; spndx = 3; pricendx = 1; wLndx = 0; relendx = 0;
elseif spec == 6
    display('Performing SBT identification of SBT and skill-neutral technology shocks');
    display('Variables in the VAR: premium, productivity, hours');
    data = [d_prem d_prod x_hours];
    nshocks = 2;
    pndx = 2; hndx = 3; spndx = 1; pricendx = 0; wLndx = 0; relendx = 0;
elseif spec == 9
    display('Performing SBT identification of SBT and skill-neutral technology shocks');
    display('Variables in the VAR: premium, productivity, hours, relative employment');
    data = [d_prem d_prod x_hours d_rele];
    nshocks = 2;
    pndx = 2; hndx = 3; spndx = 1; pricendx = 0; wLndx = 0; relendx = 4;
elseif spec == 25
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours');
    data = [d_prem d_rele d_prod x_hours];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 0; wLndx = 0; relendx = 2;
elseif spec == 26
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, wage low');
    data = [d_prem d_rele d_prod x_hours d_wL];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 0; wLndx = 5; relendx = 2;
elseif spec == 27
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, wage low');
    data = [d_prem d_rele d_prod x_hours d_rels];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 0; wLndx = 0; relendx = 2;
elseif spec == 28
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, price');
    data = [d_prem d_rele d_prod x_hours d_price];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 5; wLndx = 0; relendx = 2;
elseif spec == 29
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, price, investment');
    data = [d_prem d_rele d_prod x_hours d_price d_inv];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 5; wLndx = 0; relendx = 2;
elseif spec == 30
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, price, investment, consumption');
    data = [d_prem d_rele d_prod x_hours d_price d_inv d_cons];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 5; wLndx = 0; relendx = 2;
elseif spec == 31
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, price, investment, consumption, interest rate');
    data = [d_prem d_rele d_prod x_hours d_price d_inv d_cons interest];
    nshocks = 3;
    pndx = 3; hndx = 4; spndx = 1; pricendx = 5; wLndx = 0; relendx = 2;
elseif spec == 32
    display('Performing SBT identification of SBT, supply and skill-neutral technology shocks');
    display('Variables in the VAR: premium, rel. hours, productivity, hours, price');
    data = [d_price d_prem d_rele d_prod x_hours];
    nshocks = 4;
    pndx = 4; hndx = 5; spndx = 2; pricendx = 1; wLndx = 0; relendx = 3;
end
T = size(data,1)-nlagsvar;  % sample after allowing for lag structure

num_vars = size(data,2);
